
version 16.0
local path1 "${mypath}\CHNS_project\01_data\01_CHNS\"
local path2 "${mypath}\CHNS_project\01_data\02_posted\"

log using "${mypath}\CHNS_project\03_log_files\01_merge_relevant_datasets.log", replace

*1.basic survey information, including survey id, areas and age(n=143564)
import sas using "`path1'surveys_pub_12",clear
gen newid = string(Idind,"%20.0f") + string(wave)
gen homeid = string(hhid,"%20.0f") + string(wave)
destring newid homeid,replace
format newid homeid %20.0f
keep newid age wave Idind t1 t2 hhid homeid
renames wave Idind t1 t2 hhid homeid \ wave_add_1 IDind_add_1 t1_add_1 t2_add_1 hhid_add_1 homeid_add_1
sa "`path2'age.dta",replace

*2.basic personal information, including gender, birthdate and nationality(n=44453)
import sas using "`path1'mast_pub_12",clear
format Idind %20.0f
rename Idind IDind
keep IDind GENDER NATIONALITY WEST_DOB_Y
sa "`path2'gender_birthdate.dta",replace

*3.demographic information of family members, including marital status, ethnicity and cohabiting relationship(n=180582)
import sas using "`path1'rst_12",clear
gen newid = string(IDind,"%20.0f") + string(WAVE)
gen homeid = string(hhid ,"%20.0f") + string(WAVE)
destring newid homeid,replace
format IDind newid hhid homeid %20.0f
rename WAVE wave
sa "`path2'demography.dta",replace

*4.education information(n=134256)
import sas using "`path1'educ_12",clear
gen newid = string(IDind,"%20.0f") + string(WAVE)
destring newid,replace
format newid %20.0f
keep newid A11 A12 A13 A11A_93
sa "`path2'education.dta",replace

*5.occupation information(n=117227)
import sas using "`path1'jobs_12",clear
gen newid = string(IDind,"%20.0f") + string(wave)
destring newid,replace
format newid %20.0f
drop IDind wave hhid line COMMID T1 T2 T3 T4 T5 B3 B3A B8 B13 B7_93 B12_93
sa "`path2'occupation.dta",replace

*6.personal wage and working time information(n=62029)
import sas using "`path1'wages_12",clear
gen newid = string(IDind,"%20.0f") + string(WAVE)
destring newid,replace
format newid %20.0f
sort newid C2
replace C2 = 1 if mi(C2) & inlist(WAVE,1993,1997)
drop if WAVE < 1993
gen employed_last = C2 == 1
drop IDind WAVE hhid line COMMID T1 T2 T3 T4 T5 C8_CLN I19_CLN WAGE89_IMP JOB
reshape wide B2D-I19 C4_89-I103 employed_last, i(newid) j(C2)
sa "`path2'wage.dta",replace

*7.household income information(n=43671)
import sas using "`path1'hhinc_10",clear
gen homeid = string(hhid,"%20.0f") + string(WAVE)
destring homeid,replace
format homeid %20.0f
drop WAVE hhid t1 urban
renames CPI1988 CPI2015 \ h_CPI1988 h_CPI2015
sa "`path2'household_income.dta",replace

*8.personal income information(n=88166)
*one of the interviewees answered the same income two times on the same year, thus I retained his first reply.
import sas using "`path1'indinc_10",clear
gen newid = string(IDind,"%20.0f") + string(wave)
gen homeid = string(hhid ,"%20.0f") + string(wave)
destring newid homeid,replace
format newid homeid %20.0f
*drop the repeated observation(n = 1)
bys newid:gen d = _n
drop if d == 2
recode urban (1 = 1 "urban")(0 = 2 "rural"),gen(t2)
drop urban d 
renames CPI1988 CPI2015 wave IDind t1 t2 hhid homeid \ i_CPI1988 i_CPI2015 wave_add_2 IDind_add_2 t1_add_1 t2_add_1 hhid_add_1 homeid_add_1
sa "`path2'individual_income.dta",replace

*9.marital status for women(n=26596) 
import sas using "`path1'wed_12",clear
gen newid = string(IDind ,"%20.0f") + string(wave)
destring newid ,replace
format newid %20.0f
drop hhid line COMMID T1 T2 T3 T4 T5 wave
sa "`path2'married_women.dta",replace

*10.social insurance, including health care, medical insurance and supplementary insurance(n=128728)
import sas using "`path1'ins_12",clear
gen newid = string( IDind ,"%20.0f") + string(wave)
destring newid ,replace
format newid %20.0f
drop IDind hhid line COMMID T1 T2 T3 T4 T5 wave
sa "`path2'insurance.dta",replace

*merge all data sets
*use rst_12 as master data sets
cd "`path2'"
use demography.dta,clear

*merge file "surveys_pub_12"(basic survey information)
merge 1:1 newid using age.dta,nogen
replace wave = wave_add_1 if mi(wave) & !mi(wave_add_1)
replace IDind = IDind_add_1 if mi(IDind) & !mi(IDind_add_1)
replace T1 = t1_add_1 if mi(T1) & !mi(t1_add_1)
replace T2 = t2_add_1 if mi(T2) & !mi(t2_add_1)
replace homeid = homeid_add_1 if mi(homeid) & !mi(homeid_add_1)
replace hhid = hhid_add_1 if mi(hhid) & !mi(hhid_add_1)

*merge file "mast_pub_12"(basic personal information)
merge m:1 IDind using gender_birthdate.dta,keep(matched) nogen

*merge file "educ_12"(education information)
merge 1:1 newid using education.dta,nogen

*merge file "jobs_12"(occupation information)
merge 1:1 newid using occupation.dta,nogen

*merge file "wages_12"(personal wage and working time information)
merge 1:1 newid using wage.dta,nogen

*merge file "indinc_10"(personal income information)
*drop 3 observations
merge 1:1 newid using individual_income.dta,nogen

replace wave = wave_add_2 if mi(wave) & !mi(wave_add_2)
replace IDind = IDind_add_2 if mi(IDind) & !mi(IDind_add_2)
replace T1 = t1_add_1 if mi(T1) & !mi(t1_add_1)
replace T2 = t2_add_1 if mi(T2) & !mi(t2_add_1)
replace homeid = homeid_add_1 if mi(homeid) & !mi(homeid_add_1)
replace hhid = hhid_add_1 if mi(hhid) & !mi(hhid_add_1)

*merge file "indinc_10"(personal income information)
merge m:1 homeid using household_income.dta,nogen
*drop 1 observations
drop if newid == .

*merge file "ins_12"(social insurance information)
merge 1:1 newid using insurance.dta,nogen

*merge file "wed_12"(marital status for women)
merge 1:1 newid using married_women.dta,nogen

drop wave_add_1 wave_add_2 IDind_add_1 IDind_add_2 t1_add_1 t2_add_1 homeid_add_1 hhid_add_1

sa analysis1989_2015.dta,replace
log close
